{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "source": [
        "!pip install -qU langchain-core langchain-upstage\n",
        "\n",
        "import os\n",
        "import json\n",
        "from typing import List, Dict, Any\n",
        "from langchain_upstage import UpstageGroundednessCheck\n",
        "\n",
        "os.environ[\"UPSTAGE_API_KEY\"] = \"Use Your API Key Here\""
      ],
      "metadata": {
        "id": "gzziSfRNqiW3"
      },
      "execution_count": 4,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "class AdvancedGroundednessChecker:\n",
        "    \"\"\"Advanced wrapper for Upstage Groundedness Check with batch processing and analysis\"\"\"\n",
        "\n",
        "    def __init__(self):\n",
        "        self.checker = UpstageGroundednessCheck()\n",
        "        self.results = []\n",
        "\n",
        "    def check_single(self, context: str, answer: str) -> Dict[str, Any]:\n",
        "        \"\"\"Check groundedness for a single context-answer pair\"\"\"\n",
        "        request = {\"context\": context, \"answer\": answer}\n",
        "        response = self.checker.invoke(request)\n",
        "\n",
        "        result = {\n",
        "            \"context\": context,\n",
        "            \"answer\": answer,\n",
        "            \"grounded\": response,\n",
        "            \"confidence\": self._extract_confidence(response)\n",
        "        }\n",
        "        self.results.append(result)\n",
        "        return result\n",
        "\n",
        "    def batch_check(self, test_cases: List[Dict[str, str]]) -> List[Dict[str, Any]]:\n",
        "        \"\"\"Process multiple test cases\"\"\"\n",
        "        batch_results = []\n",
        "        for case in test_cases:\n",
        "            result = self.check_single(case[\"context\"], case[\"answer\"])\n",
        "            batch_results.append(result)\n",
        "        return batch_results\n",
        "\n",
        "    def _extract_confidence(self, response) -> str:\n",
        "        \"\"\"Extract confidence level from response\"\"\"\n",
        "        if hasattr(response, 'lower'):\n",
        "            if 'grounded' in response.lower():\n",
        "                return 'high'\n",
        "            elif 'not grounded' in response.lower():\n",
        "                return 'low'\n",
        "        return 'medium'\n",
        "\n",
        "    def analyze_results(self) -> Dict[str, Any]:\n",
        "        \"\"\"Analyze batch results\"\"\"\n",
        "        total = len(self.results)\n",
        "        grounded = sum(1 for r in self.results if 'grounded' in str(r['grounded']).lower())\n",
        "\n",
        "        return {\n",
        "            \"total_checks\": total,\n",
        "            \"grounded_count\": grounded,\n",
        "            \"not_grounded_count\": total - grounded,\n",
        "            \"accuracy_rate\": grounded / total if total > 0 else 0\n",
        "        }\n",
        "\n",
        "checker = AdvancedGroundednessChecker()"
      ],
      "metadata": {
        "id": "wIU2r7XwqmxD"
      },
      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print(\"=== Test Case 1: Height Discrepancy ===\")\n",
        "result1 = checker.check_single(\n",
        "    context=\"Mauna Kea is an inactive volcano on the island of Hawai'i. Its peak is 4,207.3 m above sea level, making it the highest point in Hawaii and second-highest peak of an island on Earth.\",\n",
        "    answer=\"Mauna Kea is 5,207.3 meters tall.\"\n",
        ")\n",
        "print(f\"Result: {result1['grounded']}\")\n",
        "\n",
        "print(\"\\n=== Test Case 2: Correct Information ===\")\n",
        "result2 = checker.check_single(\n",
        "    context=\"Python is a high-level programming language created by Guido van Rossum in 1991. It emphasizes code readability and simplicity.\",\n",
        "    answer=\"Python was created by Guido van Rossum and focuses on code readability.\"\n",
        ")\n",
        "print(f\"Result: {result2['grounded']}\")\n",
        "\n",
        "print(\"\\n=== Test Case 3: Partial Information ===\")\n",
        "result3 = checker.check_single(\n",
        "    context=\"The Great Wall of China is approximately 13,000 miles long and took over 2,000 years to build.\",\n",
        "    answer=\"The Great Wall of China is very long.\"\n",
        ")\n",
        "print(f\"Result: {result3['grounded']}\")\n",
        "\n",
        "print(\"\\n=== Test Case 4: Contradictory Information ===\")\n",
        "result4 = checker.check_single(\n",
        "    context=\"Water boils at 100 degrees Celsius at sea level atmospheric pressure.\",\n",
        "    answer=\"Water boils at 90 degrees Celsius at sea level.\"\n",
        ")\n",
        "print(f\"Result: {result4['grounded']}\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Uk_JjdLPqtk7",
        "outputId": "1985bf5d-7fec-4f8a-81ea-14349d41b6b4"
      },
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "=== Test Case 1: Height Discrepancy ===\n",
            "Result: notGrounded\n",
            "\n",
            "=== Test Case 2: Correct Information ===\n",
            "Result: grounded\n",
            "\n",
            "=== Test Case 3: Partial Information ===\n",
            "Result: grounded\n",
            "\n",
            "=== Test Case 4: Contradictory Information ===\n",
            "Result: notGrounded\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "print(\"\\n=== Batch Processing Example ===\")\n",
        "test_cases = [\n",
        "    {\n",
        "        \"context\": \"Shakespeare wrote Romeo and Juliet in the late 16th century.\",\n",
        "        \"answer\": \"Romeo and Juliet was written by Shakespeare.\"\n",
        "    },\n",
        "    {\n",
        "        \"context\": \"The speed of light is approximately 299,792,458 meters per second.\",\n",
        "        \"answer\": \"Light travels at about 300,000 kilometers per second.\"\n",
        "    },\n",
        "    {\n",
        "        \"context\": \"Earth has one natural satellite called the Moon.\",\n",
        "        \"answer\": \"Earth has two moons.\"\n",
        "    }\n",
        "]\n",
        "\n",
        "batch_results = checker.batch_check(test_cases)\n",
        "for i, result in enumerate(batch_results, 1):\n",
        "    print(f\"Batch Test {i}: {result['grounded']}\")\n",
        "\n",
        "print(\"\\n=== Results Analysis ===\")\n",
        "analysis = checker.analyze_results()\n",
        "print(f\"Total checks performed: {analysis['total_checks']}\")\n",
        "print(f\"Grounded responses: {analysis['grounded_count']}\")\n",
        "print(f\"Not grounded responses: {analysis['not_grounded_count']}\")\n",
        "print(f\"Groundedness rate: {analysis['accuracy_rate']:.2%}\")\n",
        "\n",
        "print(\"\\n=== Multi-domain Testing ===\")\n",
        "domains = {\n",
        "    \"Science\": {\n",
        "        \"context\": \"Photosynthesis is the process by which plants convert sunlight, carbon dioxide, and water into glucose and oxygen.\",\n",
        "        \"answer\": \"Plants use photosynthesis to make food from sunlight and CO2.\"\n",
        "    },\n",
        "    \"History\": {\n",
        "        \"context\": \"World War II ended in 1945 after the surrender of Japan following the atomic bombings.\",\n",
        "        \"answer\": \"WWII ended in 1944 with Germany's surrender.\"\n",
        "    },\n",
        "    \"Geography\": {\n",
        "        \"context\": \"Mount Everest is the highest mountain on Earth, located in the Himalayas at 8,848.86 meters.\",\n",
        "        \"answer\": \"Mount Everest is the tallest mountain and is located in the Himalayas.\"\n",
        "    }\n",
        "}\n",
        "\n",
        "for domain, test_case in domains.items():\n",
        "    result = checker.check_single(test_case[\"context\"], test_case[\"answer\"])\n",
        "    print(f\"{domain}: {result['grounded']}\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "c4fQZ6vZqvwc",
        "outputId": "300507c5-c213-4ab2-fba8-fc285333fafa"
      },
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            "=== Batch Processing Example ===\n",
            "Batch Test 1: grounded\n",
            "Batch Test 2: grounded\n",
            "Batch Test 3: notGrounded\n",
            "\n",
            "=== Results Analysis ===\n",
            "Total checks performed: 7\n",
            "Grounded responses: 7\n",
            "Not grounded responses: 0\n",
            "Groundedness rate: 100.00%\n",
            "\n",
            "=== Multi-domain Testing ===\n",
            "Science: grounded\n",
            "History: notGrounded\n",
            "Geography: grounded\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "def create_test_report(checker_instance):\n",
        "    \"\"\"Generate a detailed test report\"\"\"\n",
        "    report = {\n",
        "        \"summary\": checker_instance.analyze_results(),\n",
        "        \"detailed_results\": checker_instance.results,\n",
        "        \"recommendations\": []\n",
        "    }\n",
        "\n",
        "    accuracy = report[\"summary\"][\"accuracy_rate\"]\n",
        "    if accuracy < 0.7:\n",
        "        report[\"recommendations\"].append(\"Consider reviewing answer generation process\")\n",
        "    if accuracy > 0.9:\n",
        "        report[\"recommendations\"].append(\"High accuracy - system performing well\")\n",
        "\n",
        "    return report\n",
        "\n",
        "print(\"\\n=== Final Test Report ===\")\n",
        "report = create_test_report(checker)\n",
        "print(f\"Overall Performance: {report['summary']['accuracy_rate']:.2%}\")\n",
        "print(\"Recommendations:\", report[\"recommendations\"])\n",
        "\n",
        "print(\"\\n=== Tutorial Complete ===\")\n",
        "print(\"This tutorial demonstrated:\")\n",
        "print(\"• Basic groundedness checking\")\n",
        "print(\"• Batch processing capabilities\")\n",
        "print(\"• Multi-domain testing\")\n",
        "print(\"• Results analysis and reporting\")\n",
        "print(\"• Advanced wrapper implementation\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "pRB0AyCasQ4s",
        "outputId": "3b4eca53-51c1-4784-a9d3-310de49a36a8"
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\n",
            "=== Final Test Report ===\n",
            "Overall Performance: 100.00%\n",
            "Recommendations: ['High accuracy - system performing well']\n",
            "\n",
            "=== Tutorial Complete ===\n",
            "This tutorial demonstrated:\n",
            "• Basic groundedness checking\n",
            "• Batch processing capabilities\n",
            "• Multi-domain testing\n",
            "• Results analysis and reporting\n",
            "• Advanced wrapper implementation\n"
          ]
        }
      ]
    }
  ]
}